# Author: Alfio Lazzaro, alazzaro@cray.com (2013)

# Use: module load PrgEnv-gnu
# Remember to remove any module specific for GPU, e.g. module unload craype-accel-nvidia35 cudatoolkit

#
# target compiler... these are the options used for building the library.
# They should be aggessive enough to e.g. perform vectorization for the specific CPU (e.g. -ftree-vectorize -march=native),
# and allow some flexibility in reordering floating point expressions (-ffast-math).
# Higher level optimisation (in particular loop nest optimization) should not be used.
#
target_compile="ftn -O2 -funroll-loops -ffast-math -ftree-vectorize -cpp -finline-functions -fopenmp -march=native"

#
# target dgemm link options... these are the options needed to link blas (e.g. -lblas)
# blas is used as a fall back option for sizes not included in the library or in those cases where it is faster
# the same blas library should thus also be used when libsmm is linked.
#
# Use libsci when using ftn, therefore no need to set blas_linking
#

#
# host compiler... this is used only to compile a few tools needed to build
# the library. The library itself is not compiled this way.
# This compiler needs to be able to deal with some Fortran2003 constructs.
#
host_compile="gfortran -O2"

#
# Set the aprun command and its options for batch submission
#
aprun_cmd="aprun -n 1 -N 1 -d ${ntasks} -r 1"
